﻿// ------------------------------------------------------------------------------------------------
//  <copyright file="StopWords.cs" company="Iveely">
//    Copyright (c) Iveely Liu.  All rights reserved.
//  </copyright>
//  
//  <Create Time>
//    03/02/2013 21:59 
//  </Create Time>
//  
//  <contact owner>
//    liufanping@iveely.com 
//  </contact owner>
//  -----------------------------------------------------------------------------------------------

#region

using System.IO;
using IveelySE.Common;

#endregion

namespace IveelySE.Liquidate
{
    /// <summary>
    ///   停用词处理
    /// </summary>
    public class StopWords
    {
        /// <summary>
        ///   词汇集合
        /// </summary>
        private static readonly SortedList<int> List = new SortedList<int>();

        // private static SortedList<string> List = new SortedList<string>();
        /// <summary>
        ///   加载词汇
        /// </summary>
        public static void Load()
        {
            if(List.Count < 1)
            {
                string[] words = File.ReadAllLines("StopWord.txt");
                foreach(string word in words)
                {
                    // List.Add(word);
                    List.Add(Function.Hash(word));
                }
            }
            // List.Sort();
        }

        /// <summary>
        ///   清理停用词
        /// </summary>
        /// <param name="keys"> 分词后的关键字 </param>
        /// <returns> </returns>
        public static string[] Process(string[] keys)
        {
            //Load();
            var result = new SortedList<string>();
            foreach(string key in keys)
            {
                if(key.Trim() == "")
                {
                    continue;
                }
                // if (!List.Contains(key))
                if(List.BinarySearch(Function.Hash(key)) < 0)
                {
                    result.Add(key);
                }
            }
            return result.ToArray();
        }
    }
}